First, we are going to bring in two datasets. One is on CEO compensation for the IT industry (2019). The second is a reduced dataset about the sales price of tractors
CEO <- read_excel("../data/CEOCompIT.xlsx")
Tractor <- read_excel("../data/TractorBivariate.xlsx")
summary(CEO)
## Company Name Total Compensation
## Length:336 Length:336 Min. : 0
## Class :character Class :character 1st Qu.: 2158707
## Mode :character Mode :character Median : 4869462
## Mean : 7247166
## 3rd Qu.: 9628200
## Max. :66935100
sd(CEO$`Total Compensation`)
## [1] 8027321
CEO%>% select(`Total Compensation`) %>%
tbl_summary(statistic = list(all_continuous() ~ c("{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"),
all_categorical() ~ "{n} / {N} ({p}%)"),
type = all_continuous() ~ "continuous2")
| Characteristic | N = 336 |
|---|---|
| Total Compensation | |
| Mean (SD) | 7,247,166 (8,027,321) |
| Median (IQR) | 4,869,462 (2,158,707, 9,628,200) |
| Range | 0, 66,935,100 |
hist(CEO$`Total Compensation`, breaks = 'fd')
CEO$CompensationMillions <- CEO$`Total Compensation`/1000000
binsize <- 2*IQR(CEO$CompensationMillions)/length(CEO$CompensationMillions)^(1/3)
ggplot(CEO, aes(CompensationMillions))+
geom_histogram(binwidth = binsize,col = 'black', fill = 'darkblue', alpha = 0.5)+
labs(title = 'Distribution of CEO Compensation in IT Industry 2019', caption = "(SourceAFL-CIO)",x = 'Compensation [millions]', y= 'Frequency')+
theme_bw()
boxplot(CEO$CompensationMillions)
plot_ly(y = CEO$CompensationMillions, type = 'box', name = 'Compensation [millions]',text = paste(CEO$Company,"-", CEO$Name)) %>%
layout(title = 'Distribution of CEO Compensation in IT Industry 2019')
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
plot(Tractor$HorsePower, Tractor$`Sales Price`)
cor(Tractor$`Sales Price`,Tractor$HorsePower)
## [1] 0.6643812
ggplot(Tractor,aes(HorsePower, `Sales Price`))+
geom_point(color = 'gray40')+
geom_smooth(method = "lm", se = F, color = 'darkblue')+
theme_bw()+
labs(title = "Relationship between Horsepower and Sales Price of Tractors")